Load Libraries
# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(skimr)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(naniar)
##
## Attaching package: 'naniar'
##
## The following object is masked from 'package:skimr':
##
## n_complete
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(broom)
library(countrycode)
Data Wrangling Functions
load_energy_data <- function(url = "https://raw.githubusercontent.com/owid/energy-data/master/owid-energy-data.csv") {
read_csv(url)
}
filter_energy_data <- function(df) {
df %>%
filter(!str_starts(iso_code, "OWID"), !is.na(country), !is.na(year)) %>%
filter(year >= 2000) %>%
select(where(~ sum(is.na(.)) < 0.4 * nrow(df))) %>%
filter(!is.na(population), !is.na(gdp))
}
select_energy_columns <- function(df) {
df %>%
select(
country, iso_code, year, population, gdp,
electricity_generation,
renewables_electricity, fossil_electricity,
solar_electricity, wind_electricity, hydro_electricity,
renewables_share_elec, coal_share_elec, gas_share_elec, oil_share_elec
)
}
add_normalized_metrics <- function(df) {
df %>%
mutate(
electricity_per_capita = electricity_generation / population,
renewables_per_capita = renewables_electricity / population,
fossil_per_capita = fossil_electricity / population,
electricity_per_gdp = electricity_generation / gdp,
gdp_per_electricity = gdp / (electricity_generation + 1)
)
}
add_log_transforms <- function(df) {
df %>%
mutate(
log_gdp = log(gdp + 1),
log_population = log(population + 1),
log_electricity = log(electricity_generation + 1)
)
}
na_percentage <- function(df) {
sapply(df, function(col) round(mean(is.na(col)) * 100, 2))
}
add_energy_ratios <- function(df) {
df %>%
mutate(
fossil_to_renewable_ratio = fossil_electricity / (renewables_electricity + 1),
fossil_share_elec = fossil_electricity / (electricity_generation + 1),
solar_share = solar_electricity / (renewables_electricity + 1),
wind_share = wind_electricity / (renewables_electricity + 1),
hydro_share = hydro_electricity / (renewables_electricity + 1)
)
}
add_classification_flags <- function(df) {
df %>%
mutate(
high_renewable = if_else(renewables_share_elec > 50, 1, 0),
transitioning = if_else(renewables_share_elec > fossil_share_elec, 1, 0)
)
}
transform_energy_data <- function(df) {
df %>%
add_normalized_metrics() %>%
add_log_transforms() %>%
add_energy_ratios() %>%
add_classification_flags()
}
Load Dataset 2 - Electricity Generation Data
# Load and process
energy_raw <- load_energy_data()
## Rows: 21812 Columns: 130
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): country, iso_code
## dbl (128): year, population, gdp, biofuel_cons_change_pct, biofuel_cons_chan...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cat("Number of rows in Raw Dataset :", nrow(energy_raw),'\n')
## Number of rows in Raw Dataset : 21812
cat("Number of columns in Raw Dataset :", ncol(energy_raw),'\n')
## Number of columns in Raw Dataset : 130
energy_clean <- filter_energy_data(energy_raw)
cat("Number of rows in Filtered Dataset :", nrow(energy_clean),'\n')
## Number of rows in Filtered Dataset : 3795
cat("Number of columns in Filtered Dataset :", ncol(energy_clean),'\n')
## Number of columns in Filtered Dataset : 130
energy_selected <- select_energy_columns(energy_clean)
cat("Number of columns in Dataset after dropping unnecessary columns:", ncol(energy_selected),'\n')
## Number of columns in Dataset after dropping unnecessary columns: 15
energy_transformed <- transform_energy_data(energy_selected)
# Add continent info
energy_transformed <- energy_transformed %>%
mutate(continent = countrycode(country, "country.name", "continent"))
cat("Number of columns in Dataset after creating new columns and transformations:", ncol(energy_transformed),'\n')
## Number of columns in Dataset after creating new columns and transformations: 31
Check for Duplicates and Missing Values
### Checking for duplicated and % of missing values
energy_transformed %>% filter(duplicated(.))
### Checking for missing values
print("Percentage of Missing Values left :")
## [1] "Percentage of Missing Values left :"
na_percentage(energy_transformed)
## country iso_code year
## 0.00 0.00 0.00
## population gdp electricity_generation
## 0.00 0.00 0.00
## renewables_electricity fossil_electricity solar_electricity
## 0.00 0.00 0.13
## wind_electricity hydro_electricity renewables_share_elec
## 0.00 1.92 0.13
## coal_share_elec gas_share_elec oil_share_elec
## 0.13 0.71 0.13
## electricity_per_capita renewables_per_capita fossil_per_capita
## 0.00 0.00 0.00
## electricity_per_gdp gdp_per_electricity log_gdp
## 0.00 0.00 0.00
## log_population log_electricity fossil_to_renewable_ratio
## 0.00 0.00 0.00
## fossil_share_elec solar_share wind_share
## 0.00 0.13 0.00
## hydro_share high_renewable transitioning
## 1.92 0.13 0.13
## continent
## 0.00
Some latest stats about countries about renewable electricity
# Filter for latest year
latest_year <- max(energy_transformed$year, na.rm = TRUE)
latest_data <- energy_transformed %>% filter(year == latest_year)
# Get top countries for different metrics
top_renewables <- latest_data %>% arrange(desc(renewables_electricity)) %>% slice(1)
top_generation <- latest_data %>% arrange(desc(electricity_generation)) %>% slice(1)
top_per_capita <- latest_data %>% arrange(desc(electricity_per_capita)) %>% slice(1)
top_gdp <- latest_data %>% arrange(desc(gdp)) %>% slice(1)
top_renew_share <- latest_data %>% arrange(desc(renewables_share_elec)) %>% slice(1)
# Print the results
cat(glue::glue("Year considered: {latest_year}\n"))
## Year considered: 2022
cat(glue::glue(" Country with most renewable electricity: {top_renewables$country} ({round(top_renewables$renewables_electricity, 2)} TWh)\n"))
## Country with most renewable electricity: China (2670.18 TWh)
cat(glue::glue(" Country with highest electricity generation: {top_generation$country} ({round(top_generation$electricity_generation, 2)} TWh)\n"))
## Country with highest electricity generation: China (8848.73 TWh)
cat(glue::glue(" Country with highest GDP: {top_gdp$country} (${format(round(top_gdp$gdp, 0), big.mark=',')})\n"))
## Country with highest GDP: China ($2.696602e+13)
cat(glue::glue(" Country with highest renewables share: {top_renew_share$country} ({round(top_renew_share$renewables_share_elec, 2)}%)\n"))
## Country with highest renewables share: Albania (100%)
Load CO2 and Clean Data - CO2 Emissions Dataset -1
co2_data <- read_csv("https://raw.githubusercontent.com/owid/co2-data/master/owid-co2-data.csv")
## Rows: 50191 Columns: 79
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): country, iso_code
## dbl (77): year, population, gdp, cement_co2, cement_co2_per_capita, co2, co2...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
filter_co2_data <- function(df) {
df %>%
filter(!str_starts(iso_code, "OWID"), !is.na(country), !is.na(year)) %>%
filter(year >= 2000) %>%
select(where(~ sum(is.na(.)) < 0.4 * nrow(df))) %>%
filter(!is.na(population), !is.na(gdp))
}
co2_data = filter_co2_data(co2_data)
co2_data = co2_data %>% select(-c(cement_co2,cement_co2_per_capita,co2_growth_abs,co2_growth_prct,co2_including_luc_growth_abs,co2_including_luc_growth_prct,cumulative_cement_co2,cumulative_co2_including_luc,cumulative_luc_co2,flaring_co2,flaring_co2_per_capita,cumulative_flaring_co2,share_global_flaring_co2,share_global_cumulative_flaring_co2))
#COâ‚‚ emissions year-over-year change
co2_data = co2_data %>%
group_by(country) %>%
mutate(co2_pct_change = (co2 - lag(co2)) / lag(co2) * 100)
#Total COâ‚‚ from fossil fuel types
co2_data = co2_data %>%
mutate(fossil_fuel_co2 = coal_co2 + oil_co2 + gas_co2)
#log gdp
co2_data = co2_data %>% mutate(log_gdp = log(gdp))
#log co2
co2_data = co2_data %>% mutate(log_co2 = log(co2))
Merge Two Datasets to Single one - Using Country and Year as
keys
co2_transformed <- co2_data
co2_transformed <- co2_transformed %>%
select(-iso_code, -population, -gdp, -log_gdp)
#
energy_co2_merged <- left_join(
energy_transformed,
co2_transformed,
by = c("country", "year")
)
get_co2_electricity_ratio <- function(df) {
df %>%
mutate(
co2_per_kwh = co2 / electricity_generation)
}
energy_co2_merged <- get_co2_electricity_ratio(energy_co2_merged)
#write_csv(energy_co2_merged, "/Users/manasamangipudi/Desktop/Semester-3/DataWrangling/Project/data/energy_co2_data_merged.csv")
energy_co2_merged
Exploratory Data Analysis
CO2 Dataset
CO2 Emissions vs Population - How GDP impacts it
library(ggrepel)
ggplot(energy_co2_merged %>% filter(year==2022), aes(x = population, y = co2, size = gdp, label = country)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_text_repel(max.overlaps = 10, size = 5) +
scale_x_log10(labels = scales::comma) +
scale_y_log10(labels = scales::comma) +
labs(
title = "Fossil Fuel CO2 Emissions vs Population (Bubble Size = GDP)",
x = "Population (log scale)",
y = "CO2 Emissions (log scale)",
size = "GDP"
) +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 140 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

CO2 Emissions of Global Super Powers
ggplot(energy_co2_merged %>% filter(country %in% c("United States","India","China","Germany","Brazil")),aes(x=year,y=co2,color = country))+
geom_line(size=1)+
labs(title = "CO2 Emisions Over Time for Global Super Powers ",y="CO2 (Million Tonnes)")+
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Paris Agreement Effect on CO2 Emissions
co2_change <- energy_co2_merged %>%
filter(year %in% c(2015, 2022)) %>%
select(country, year, co2,iso_code) %>%
pivot_wider(names_from = year, values_from = co2, names_prefix = "co2_") %>%
mutate(co2_diff_2015_2022 = co2_2022 - co2_2015)
co2_change %>%
drop_na(co2_diff_2015_2022,iso_code) %>%
arrange(co2_diff_2015_2022) %>%
slice(1:15) %>%
ggplot(aes(x = reorder(country, co2_diff_2015_2022,decreasing = TRUE), y = co2_diff_2015_2022)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Countries with Most Decrease in CO2 Emissions (2015–2022)",
x = "Country",
y = "Change in CO2 Emissions (Mt)"
) +
theme_minimal()

Electricity Generation Dataset :
Electricity Generation over Time by Continents
ggplot(energy_co2_merged, aes(x = continent, y = electricity_generation)) +
geom_boxplot(fill = "steelblue") +
theme_minimal() +
labs(title = "Electricity by Continent")

ggplot(energy_co2_merged, aes(x = continent, y = electricity_per_capita)) +
geom_boxplot(fill = "steelblue") +
theme_minimal() +
labs(title = "Electricity per Capita by Continent")

energy_co2_merged %>%
group_by(continent, year) %>%
summarise(
coal = sum(coal_share_elec, na.rm = TRUE),
gas = sum(gas_share_elec, na.rm = TRUE),
oil = sum(oil_share_elec, na.rm = TRUE)
) %>%
pivot_longer(cols = c("coal", "gas","oil"), names_to = "source", values_to = "electricity") %>%
ggplot(aes(x = year, y = electricity, color = source)) +
geom_line(size = 1) +
facet_wrap(~continent, scales = "free_y") +
theme_minimal() +
labs(title = "Growth of Coal, Gas, Oil by Continent", y = "Electricity (TWh)")
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.

energy_co2_merged %>%
group_by(continent, year) %>%
summarise(
wind = sum(wind_share, na.rm = TRUE),
solar = sum(solar_share, na.rm = TRUE),
hydro = sum(hydro_share, na.rm = TRUE)
) %>%
pivot_longer(cols = c("wind", "solar","hydro"), names_to = "source", values_to = "electricity") %>%
ggplot(aes(x = year, y = electricity, color = source)) +
geom_line(size = 1) +
facet_wrap(~continent, scales = "free_y") +
theme_minimal() +
labs(title = "Growth of Wind, Solar, Hydro by Continent", y = "Electricity (TWh)")
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.

Comparing Renewable and Fossil-fuel based Electricity
Generation
energy_by_continent <- energy_transformed %>%
group_by(continent, year) %>%
summarise(
renewables = sum(renewables_electricity, na.rm = TRUE),
fossil = sum(fossil_electricity, na.rm = TRUE)
) %>%
pivot_longer(cols = c("renewables", "fossil"), names_to = "source", values_to = "electricity") %>%
ungroup()
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.
ggplot(energy_by_continent, aes(x = year, y = electricity, color = source)) +
geom_line(size = 1) +
facet_wrap(~continent, scales = "free_y") +
theme_minimal() +
labs(title = "Renewable vs Non-Renewable Electricity Generation by Continent Over Time")

Electricity vs GDP for year 2022 - Size representing
Fossil_and_Renewable Ratio
ggplot(energy_co2_merged %>% filter(year==2022), aes(x = log_gdp, y = electricity_generation, size = fossil_to_renewable_ratio, label = country)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_text_repel(max.overlaps = 10, size = 5) +
scale_x_log10(labels = scales::comma) +
scale_y_log10(labels = scales::comma) +
theme_minimal()
## Warning: ggrepel: 155 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Linear Relationship between GDP and Electricity Generation
Merged Dataset - EDA
Global Trends of Electricity Generation and co2 emissions over
time
energy_co2_merged %>%
group_by(year) %>%
summarise(
fossil = mean(fossil_electricity, na.rm = TRUE),
co2 = mean(co2, na.rm = TRUE),
renewable = mean(renewables_electricity, na.rm = TRUE)
) %>%
pivot_longer(cols = c("fossil", "co2", "renewable"), names_to = "source", values_to = "share") %>%
ggplot(aes(x = year, y = share, color = source)) +
geom_line(size = 1.2) +
theme_minimal() +
labs(
title = "Global Trend of Electricity Generation and co2 over time",
x = "Year",
y = "Share of Electricity (%)",
color = "Source"
)

Continent-wise Trends of Electricity Generation and co2 emissions
over time
library(tidyverse)
energy_co2_merged %>%
group_by(continent, year) %>%
summarise(
co2 = mean(co2, na.rm = TRUE),
fossil = mean(fossil_electricity, na.rm = TRUE),
renew = mean(renewables_share_elec, na.rm = TRUE),
.groups = 'drop'
) %>%
pivot_longer(cols = c(co2, fossil, renew), names_to = "variable", values_to = "value") %>%
ggplot(aes(x = year, y = value, color = variable)) +
geom_line(size = 1) +
facet_wrap(~continent, scales = "free_y") +
theme_minimal() +
labs(
title = "CO2 Emissions, Fossil and Renewable Electricity Trends by Continent",
x = "Year",
y = "Electricity Share/Emissions",
color = "Metric"
)

Fossil Electricity and CO2 Emissions Linear Relationship
energy_co2_merged %>%
group_by(continent, year) %>%
summarise(
fossil_elec = mean(fossil_electricity, na.rm = TRUE),
co2 = mean(co2, na.rm = TRUE),
.groups = "drop"
) %>%
ggplot(aes(x = fossil_elec, y = co2)) +
geom_point(alpha = 0.6, color = "firebrick") +
geom_smooth(method = "lm", color = "black") +
facet_wrap(~continent, scales = "free") +
labs(
title = "Fossil Electricity vs CO2 Emissions by Continent",
x = "Fossil Electricity (TWh)",
y = "CO2 Emissions (Mt)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

CO2 Emissions per unit of electricity generated over time
energy_co2_merged %>%
mutate(co2_per_twh = co2 /electricity_generation) %>%
group_by(continent, year) %>%
summarise(
co2_efficiency = mean(co2_per_twh, na.rm = TRUE),
.groups = "drop"
) %>%
ggplot(aes(x = year, y = co2_efficiency, color = continent)) +
geom_line(size = 1.2) +
labs(
title = "COâ‚‚ Emissions per Unit of Electricity Generated",
y = "COâ‚‚ / TWh",
x = "Year"
) +
theme_minimal()

GHG Emissions per unit of electricity generated over time
energy_co2_merged %>%
mutate(ghg_per_twh = total_ghg /electricity_generation) %>%
group_by(continent, year) %>%
summarise(
ghg_efficiency = mean(ghg_per_twh, na.rm = TRUE),
.groups = "drop"
) %>%
ggplot(aes(x = year, y = ghg_efficiency, color = continent)) +
geom_line(size = 1.2) +
labs(
title = "GHG Emissions per Unit of Electricity Generated",
y = "GHG / TWh",
x = "Year"
) +
theme_minimal()

Linear regression of CO2 against every other column sorted by
estimate
# 1. Filter for United States
us_data <- energy_co2_merged %>%
filter(country == "United States") %>%
select(-country, -iso_code, -continent, year) # Remove non-numeric/grouping vars
# 2. Remove columns with all NA or zero variance
us_data <- us_data %>%
select(where(is.numeric)) %>%
select(where(~ sum(!is.na(.)) > 0)) %>%
select(where(~ sd(., na.rm = TRUE) > 0))
# 3. Build models and extract p-values
results <- map_dfr(
setdiff(names(us_data), "co2"),
function(var) {
df <- us_data %>% select(co2, !!sym(var)) %>% drop_na()
if (nrow(df) < 10) return(NULL) # skip if not enough data
model <- lm(co2 ~ ., data = df)
tidy(model) %>%
filter(term != "(Intercept)") %>%
mutate(variable = var)
}
)
# 4. Print significant predictors (p >= 0.05)
significant <- results %>%
filter(p.value < 0.05) %>%
arrange(p.value)
head(significant %>% arrange(desc(estimate)))
Fossil Fuel vs CO2 Emissions in 2022 (By Population)
library(ggrepel)
ggplot(energy_co2_merged %>% filter(year==2022), aes(x = renewables_per_capita, y = co2, size = population, label = country)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_text_repel(max.overlaps = 5, size = 4) +
scale_x_log10(labels = scales::comma) +
scale_y_log10(labels = scales::comma) +
labs(
title = "Fossil Fuel CO2 Emissions vs Population (Bubble Size = Population)",
x = "Renewables_per_capita",
y = "CO2 Emissions (log scale)",
size = "Population"
) +
theme_minimal()
## Warning in scale_x_log10(labels = scales::comma): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 134 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# Global Scatter Plot: CO2 per Capita vs Renewables Share
energy_co2_merged <- energy_co2_merged %>%
mutate(co2_per_capita = co2 / population)
ggplot(energy_co2_merged, aes(x = renewables_share_elec, y = log(co2_per_capita))) +
geom_point(alpha = 0.4, color = "darkgreen") +
geom_smooth(method = "lm", color = "red") +
labs(
title = "Relationship between Renewable Electricity Share and CO2 Emissions per Capita",
x = "Renewables Share (%)",
y = "Log CO2 Emissions per Capita (tons)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 28 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Global Regression Model
model_global <- lm(co2_per_capita ~ renewables_share_elec + gdp, data = energy_co2_merged)
summary(model_global)
##
## Call:
## lm(formula = co2_per_capita ~ renewables_share_elec + gdp, data = energy_co2_merged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.362e-06 -3.485e-06 -8.430e-07 1.318e-06 6.067e-05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.811e-06 1.471e-07 46.314 <2e-16 ***
## renewables_share_elec -6.285e-08 2.958e-09 -21.248 <2e-16 ***
## gdp 4.601e-19 5.126e-20 8.976 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.028e-06 on 3764 degrees of freedom
## (28 observations deleted due to missingness)
## Multiple R-squared: 0.1335, Adjusted R-squared: 0.133
## F-statistic: 289.9 on 2 and 3764 DF, p-value: < 2.2e-16
top5 <- c("Germany", "India", "Brazil", "United States", "China")
energy_co2_merged %>%
filter(country %in% top5) %>%
ggplot(aes(x = year, y = co2 / population, color = country)) +
geom_line(size = 1) +
labs(
title = "CO2 Emissions per Capita Over Time (Top 5 Economies)",
y = "CO2 per Capita (tons)",
x = "Year"
) +
theme_minimal()

top5 <- c("United States", "China", "India", "Germany", "Brazil")
energy_co2_merged %>%
filter(country %in% top5) %>%
ggplot(aes(x = year, y = renewables_share_elec, color = country)) +
geom_line(size = 1) +
labs(
title = "Renewable Electricity Share Over Time (Top 5 Economies)",
y = "Renewables Share (%)",
x = "Year"
) +
theme_minimal()

energy_co2_merged %>%
group_by(year) %>%
summarise(avg_co2_per_kwh = mean(co2 / electricity_generation, na.rm = TRUE)) %>%
ggplot(aes(x = year, y = avg_co2_per_kwh)) +
geom_line(color = "firebrick", size = 1.2) +
labs(
title = "Global Average CO2 Emissions per kWh Over Time",
x = "Year",
y = "CO2 per kWh"
) +
theme_minimal()

energy_co2_merged %>%
filter(year == max(year)) %>%
mutate(co2_per_kwh = co2 / electricity_generation) %>%
ggplot(aes(x = renewables_share_elec, y = co2_per_kwh)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(
title = "Renewables Share vs COâ‚‚ per kWh (Latest Year)",
x = "Renewables Share (%)",
y = "COâ‚‚ per kWh"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
